## Filter down each data set into the appropriate subset of weeks
leagueDat       <- leagueDat.MASTER
dat             <- dat.MASTER             %>% filter(week <= currentWeek)
lineUps         <- lineUps.MASTER         %>% filter(week <= currentWeek)
playerscoresDat <- playerscoresDat.MASTER %>% filter(week <= currentWeek)
all.sched       <- all.sched.MASTER       %>% filter(week <= currentWeek)

leagueDatttt <- leagueDat %>% rename(teamID=ID)


## collapse results at schedule level
allSchedSum <- all.sched %>%
  group_by(nschedule, teamID) %>%
  summarize(nWins = sum(winner)) %>% 
  ungroup %>%
  mutate(count = 1)


## collapse results at team level
allResults <- allSchedSum %>%
  group_by(teamID, nWins) %>%
  summarise(nResults = sum(count)) %>%
  merge(leagueDatttt)

Total Wins Approach

This approach sums, for each week, how many other teams you would have beaten. A team’s expected win percentage is then calculated as the number of actual possible wins, divided by the maximum number of possible wins possible, 9*number of weeks.

############################ x
### Checking Total Week Wins
############################ x  

datWeekWins <- dat %>%
  # Generate number of weekwins per week for each team
  group_by(week) %>%
  mutate(nwins = order(order(points, decreasing = FALSE)) - 1) %>%
  ungroup %>% group_by(owner) %>%
  
  # Sum the week wins for each team
  summarise(weekwins = sum(nwins), wins = sum(winner)) %>%
  
  # Generate expected wins and difference between actual and expected
  mutate(potentialWins = currentWeek * 9,
         expectedWins  = (weekwins / potentialWins) * currentWeek) %>%
  mutate(diference = wins - expectedWins) %>%
 
  # Format resutls
  select(-c(potentialWins)) %>%
  mutate(expectedWins = round(expectedWins, 2),
         diference    = round(diference,    2)) %>%
  arrange(desc(expectedWins))

  # Name Columns
  names(datWeekWins) <-  c("Team", "Week Wins", "Actual Wins", "Expected Wins", 
                           "Actual Wins - Expected Wins")

datWeekWins.LIST[[currentWeek]] <- datWeekWins %>% mutate(reportNo = REPORTNO)
  DT::datatable(datWeekWins, options = list(dom = 't'), rownames = F)
  if (REPORTNO == thisWeek) {
    ## Append the weekly datasets from the weekly reports
    datWeekWins.AllWeeks   <- plyr::rbind.fill(datWeekWins.LIST)
    
    weekWinsSummary <- datWeekWins.AllWeeks %>%
      mutate("Expected Win Percent" = round(`Expected Wins`/reportNo,2)) %>%
      rename(Week = reportNo) %>% 
      ggplot(aes(x = Week, y = `Expected Win Percent`)) +
      geom_line(aes(color=Team)) +
      geom_point(aes(color=Team)) + 
      scale_x_continuous(breaks=seq(0,currentWeek,1)) + 
      ggtitle("Expected Win Percentage by Week: Total Week Wins")
    
    ggplotly(weekWinsSummary)
  }



Pythagorean Wins Analysis

This analysis tries to calculate expected wins as a function of points for, and a proxy for points against. The exponent used is 13.91.

## Function to return average points for all teams except a given team
butme <- function(x, team) {
  x %>%
    group_by(owner) %>% 
    summarize(pf = sum(points)) %>%
    filter(owner != team) -> .
  return(as.numeric(mean(.$pf)))
}

## create dataframe of average of teams except for a team's points for
against.proxy <- vector(length = length(unique(dat$owner)))
for (i in 1:length(unique(dat$owner))) against.proxy[i] <- butme(dat, unique(dat$owner)[i])
againstProxies <- data.frame(owner = unique(dat$owner), proxy= against.proxy)

## Power for calculation
f = 13.91

## Create Table
pyThagStats <- dat %>% 
  # Get total points for each owner
  group_by(owner) %>% 
  summarize(points = sum(points), wins = sum(winner)) %>% 
  
  # Merge on average points against proxy
  merge(againstProxies) %>%
  
  # Calculate expected wins and difference
  mutate(winratio = points^f/(points^f + proxy^f)) %>%
  mutate(expectedWins = winratio*currentWeek) %>%
  mutate(difference = wins - expectedWins) %>%
  
  # Formatting
  select(-c(points, proxy, winratio)) %>%
  mutate(expectedWins = round(expectedWins, 2), difference = round(difference, 2)) %>%
  arrange(desc(expectedWins))
  
  # Name Columns
  names(pyThagStats) <- c("Team", "Actual Wins", "Expected Wins", 
                          "Actual Wins - Expected Wins")

rm(againstProxies)

pyThagStats.LIST[[currentWeek]] <- pyThagStats %>% mutate(reportNo = REPORTNO)

DT::datatable(pyThagStats, options = list(dom = 't'), rownames = F)



All Schedules Simulated

Our league is setup so that each team plays each of the other 9 teams once during the first 9 weeks of the season, and then replays the first 4 teams they played. There are 362,880 such possible schedules.

############################ x
### Simulate All Schedules Analysis
############################ x 

forMerge   <- leagueDat  %>% select(-c(fullname))
allResults <- allResults %>% select(-c(fullname)) %>% merge(forMerge)


## Faceted histogram of wins
betterName <- ggplot(allResults, aes(x=nWins, y =nResults)) + 
  geom_bar(stat = "identity") +
  facet_wrap(~owner, ncol = 2)


## max/min table
maxMinTable <- allResults %>%
  # Min and Max Wins by Owner
  group_by(owner) %>%
  summarise(minWins = min(nWins), 
            maxWins = max(nWins)) %>%
  
  # Merge on number of schedules with each result
  merge(allResults) %>%
  filter(nWins == minWins | nWins == maxWins) %>%
  mutate(minOrMax = ifelse(nWins == minWins, "min", "max")) %>%
  
  # Formatting
  select(-c(nWins, teamID, ID, teamID)) %>%
  reshape(timevar = "minOrMax", 
          idvar = c("owner", "minWins", "maxWins"), 
          direction = "wide") %>%
  arrange(desc(maxWins),desc(nResults.max))
  
  # Name Columns
  names(maxMinTable) <-  c("Team", "Min Wins Possible", "Max Wins Possible", "# Min", "# Max")

rm(allResults)

Distribution of Wins over All Potential Schedules



Max and Min Wins Possible

This table shows, for each team, the maximum number of wins and the minimum number of wins that each team could have achieved over all possible schedules. It also shows out of the 362880 possible schedules, how many times a team achieves that number of wins.



Start/Sit Efficiency Analysis

############################ x
### Start sit analysis
############################ x

## Total Lost Points and average maxscore
merged2 <- lineUps %>% 
  # Generate Lost Points - i.e. difference between Actual Score and Max Possible
  mutate(lostPoints = ActualMax - points) %>% 
  
  # Summary of Stats per Team
  group_by(owner) %>% 
  summarise(lostPoints = mean(lostPoints), 
            avgOpt = mean(ActualMax), 
            avgPoints = mean(points)) %>% 
  
  # Format results
  mutate(lostPoints = round(lostPoints, 2), 
         avgOpt = round(avgOpt, 2), 
         avgPoints = round(avgPoints, 2)) %>%
  arrange(lostPoints)
  
  # Name Columns
  names(merged2) <- c("Team", "Average Lost Points", "Average Optimal Points", "Average Points")

merged2.LIST[[currentWeek]] <- merged2 %>% mutate(reportNo = REPORTNO)
  
## Who has perfect weeks?
perfectWeeks = lineUps %>% filter(ActualMax == points) 


## Barbell plot data munging 
dattt <- lineUps %>%  
  group_by(owner) %>% 
  summarise(avgPoints = mean(points), 
            avgOpt = mean(ActualMax)) %>% 
  arrange(desc(avgPoints))

  # Save Owners as Factor for Plot Sorting
  dattt$owner <- factor(dattt$owner, levels=as.character(dattt$owner)) 


  # Plot 
  gg <- ggplot(dattt, aes(x=avgPoints, xend=avgOpt, y=owner, group=owner)) + 
  geom_dumbbell(color="#a3c4dc", 
                size=2,
                colour_x = "blue",
                colour_xend = "blue",
                show.legend = TRUE) + 
  labs(x=NULL, y=NULL, title="Average Actual Points vs. Possible Points") + 
  barbbellTheme


## Optimal Lineup records - If everyone played optimal lineups
oppLineUps <- lineUps %>% 
  select(week, ID, ActualMax) %>% 
  rename(oppID = ID, oppMaxScore = ActualMax) 

  mergdOppScore <- lineUps %>% 
  # Merge on oppponent optimal score
  inner_join(oppLineUps) %>% 
  
  # Identify how many wins each team would get
  mutate(optWinner = ifelse(ActualMax > oppMaxScore, 1, 0)) %>%
  group_by(owner) %>% 
  summarize(winsOptimal = sum(optWinner), 
            actualWins = sum(winner)) %>%
  mutate(difference = winsOptimal - actualWins)
## Joining, by = c("week", "oppID")
  # Name Columns
  names(mergdOppScore) <- c("Team", "Optimal Lineup Wins", "Actual Wins", "Optimal - Actual Wins")

Optimal Records

This table contains each teams records if they and their opponent played their optimal lineups each week.



Average Points Left on Bench





Actuals vs Projections

This section of analysis concerns projections.

## Who follows projections? Who Beats/Loses to them?
ratingsWhores <- lineUps %>% 
  # Get number of times of each result per owner
  mutate(espnVsOwner = if_else(points == ProjScore, "Equal to ESPN", 
                       if_else(points > ProjScore, "Beat ESPN", "ESPN Better"))) %>%
  mutate(count = 1) %>% 
  group_by(owner, espnVsOwner) %>% 
  summarise(nTimes = sum(count)) %>% 
  
  # Reshape data & format for output
  spread(espnVsOwner, nTimes) %>% 
  mutate(`Equal to ESPN` = ifelse(is.na(`Equal to ESPN`),0,`Equal to ESPN`),
         `Beat ESPN`     = ifelse(is.na(`Beat ESPN`),    0,`Beat ESPN`),
         `ESPN Better`   = ifelse(is.na(`ESPN Better`),  0,`ESPN Better`))  %>%
  arrange(desc(`Equal to ESPN`)) %>%
  rename(Team = owner) 

ratingsWhores.LIST[[currentWeek]] <- ratingsWhores %>% mutate(reportNo = REPORTNO)

## Number of Wins if following ESPN lineups
espnVsOwner <- lineUps %>% 
  group_by(owner) %>% 
  summarize(espnWins = sum(espnWinner), 
            ownerWins = sum(winner)) %>% 
  mutate(difference = espnWins - ownerWins) %>% 
  arrange(desc(difference))
  
  # name Columns
  names(espnVsOwner) <- c("Team", "ESPN Wins", "Actual Wins", "ESPN - Actual Wins")

  
## Barbell plot - ESPN lineups vs actual lineups
dattt <- lineUps %>% 
  # Summarize number of ESPN wins and actual wins
  group_by(owner) %>% 
  summarise(avgPoints = mean(points), 
            avgESPN = mean(ProjScore)) %>% 
  arrange(desc(avgPoints))
  
  # Name owners as factors for plot sort
  dattt$owner <- factor(dattt$owner, levels=as.character(dattt$owner)) 
  
  # Plot data
  gg <- ggplot(dattt, aes(x=avgPoints, xend=avgESPN, y=owner, group=owner)) + 
  geom_dumbbell(color="#a3c4dc", 
                size=2,
                colour_x = "blue",
                colour_xend = "red",
                show.legend = TRUE) + 
  labs(x=NULL, 
       y=NULL, 
       title="Owner Linups (Blue) vs. ESPN Lineups (Red)")  +
  barbbellTheme



Lineup Skill

This plot contains the average score of the user set lineup, and the average score of the lineup that was projected by ESPN to score the most each week.



This table contains each owners record if they started the lineup that was projected by ESPN to score the most each week.



Who Follows Projections?

This table containst the number of times that each team played the lineup that was projected by ESPN to score the most each week.



Positional Strengths and Weaknesses

Average Points by Position and Team

Note that a teams WR1/RB1 is the WR/RB each week that scored the most points.

## Table of average points per position per team
actualScores <- playerscoresDat %>% 
  # Filter lineups to those who played
  filter(slotID == possSlots & slotID != 20) %>%
  
  # identify RB/WR 1 vs 2
  arrange(ID, slotID, week, desc(points)) %>% 
  group_by(week, ID, slotID) %>% 
  mutate(posNum = seq_along(points)) %>%
  ungroup %>% 
  
  # Summarize average points
  group_by(ID, slotID, posNum) %>% 
  summarise(avgPoints = mean(points)) %>%
  
  # Join on owner and position data
  inner_join(leagueDat, by = "ID") %>%
  mutate(position = ifelse(slotID == 2 & posNum == 1, "RB1", ifelse(slotID == 2 & posNum == 2, "RB2",
                    ifelse(slotID == 4 & posNum == 1, "WR1", ifelse(slotID == 4 & posNum == 2, "WR2",
                    ifelse(slotID == 0, "QB",                ifelse(slotID == 6,  "TE",
                    ifelse(slotID == 16, "DST",              ifelse(slotID == 17, "K",
                    ifelse(slotID == 23, "FLEX", "")))))))))) %>% 
  ungroup %>% 
  select(c(owner, position, avgPoints)) %>%
  
  # average points per postion across teams
  group_by(position) %>% 
  mutate(avgPosPoints = mean(avgPoints)) %>%
  mutate(pointsOverAvg = round(100*(avgPoints - avgPosPoints)/avgPosPoints,0))


## Table of points per position - all formatting basically
pointsByPosTab <- actualScores %>%
  select(c(owner, position, avgPoints)) %>%
  mutate(avgPoints = round(avgPoints, 2)) %>%
  spread(position, avgPoints) %>%
  rename(Owner = owner) %>%
  mutate(Total = QB+WR1+WR2+RB1+RB2+TE+DST+K+FLEX) %>%
  arrange(desc(Total))

## Max value for plot scale
maxValue <- max(abs(actualScores$pointsOverAvg))

## Grid plot
ggg <- ggplot(actualScores, aes(owner, position, fill = pointsOverAvg)) + 
  geom_tile(colour = "white") + 
  geom_text(aes(label=pointsOverAvg)) +
  scale_fill_gradientn(colors=c("red","white","green"),
                       values=rescale(c(-maxValue,0,maxValue)),
                       limits=c(-maxValue,maxValue)) +
  labs(x="Owner", y="Percent Above Average in League",
       title = "Points by Position", fill = "")



Points Above Average by Position



 

A work by Luke Wilson

lvzwilson@gmail.com